suppressPackageStartupMessages(library(tidyverse))
devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities

Settings

data_dir <- '/Volumes/Mitsu_NGS_3/METTL2A/'

wd <- "/Users/shuheimitsutomi/Google Drive/My Drive/Analysis/METTL2A/"
setwd(wd)

figdir <- paste0(wd, 'Figures/DRS_m3C_sites/Methods/')
tabledir <- paste0(wd, 'Tables/DRS_m3C_sites/')

pval_thresh <- .05

theme_set(
  theme_classic(base_size = 7) +
    theme(legend.position = 'bottom')
)

Functions

add_commonchange_info <- function(df) {
  
  df |> 
    mutate(
      intensity_up = case_when(
        intensity_up_G == 'up' & intensity_up_I == 'up' ~ 'common',
        intensity_up_G == 'up' ~ 'only G',
        intensity_up_I == 'up' ~ 'only I',
        .default = 'others'
      ),
      intensity_down = case_when(
        intensity_down_G == 'down' & intensity_down_I == 'down' ~ 'common',
        intensity_down_G == 'down' ~ 'only G',
        intensity_down_I == 'down' ~ 'only I',
        .default = 'others'
      ),
      dwell_up = case_when(
        dwell_up_G == 'up' & dwell_up_I == 'up' ~ 'common',
        dwell_up_G == 'up' ~ 'only G',
        dwell_up_I == 'up' ~ 'only I',
        .default = 'others'
      ),
      dwell_down = case_when(
        dwell_down_G == 'down' & dwell_down_I == 'down' ~ 'common',
        dwell_down_G == 'down' ~ 'only G',
        dwell_down_I == 'down' ~ 'only I',
        .default = 'others'
      ),
      GMM_change = case_when(
        GMM_change_G == 'change' & GMM_change_I == 'change' ~ 'common',
        GMM_change_G == 'change' ~ 'only G',
        GMM_change_I == 'change' ~ 'only I',
        .default = 'others'
      )
    )
  
  
}

calc_method_percentage <- function(df) {
  
  df |> 
    group_by(params) |> 
    mutate(percentage = 100 *  n / sum(n))
  
}

change_factor_levels <- function(df) {
  
  df |> 
    mutate(
      sig_in = factor(sig_in, 
                      levels = c('only G', 'common', 'only I', 'others')),
      params = factor(params,
                      levels = c('intensity_up', 'intensity_down',
                                 'dwell_up', 'dwell_down', 'GMM_change'))
    )
}

add_middle_Cinfo <- function(df) {
  
  df |> 
    mutate(middle_base = str_sub(ref_kmer, 3,3)) |> 
    mutate(middle_isC = ifelse(middle_base == 'C', 'C', 'others'))
  
}

add_consecutive_C_info <- function(df) {
  
  df |> 
    mutate(
      have_CC_middle = ifelse(
        grepl('.{1}CC.{2}', ref_kmer) | grepl('.{2}CC.{1}', ref_kmer),
        'yes', 'no'
      ),
      have_C3_middle = ifelse(
        grepl('.{1}CCC.{1}', ref_kmer), 'yes', 'no'
      )
    ) |> 
    mutate(
      middleC_info = case_when(
        have_C3_middle == 'yes' ~ 'C3',
        have_CC_middle == 'yes' ~ 'C2',
        middle_isC     == 'C'   ~ 'C1',
        middle_isC     == 'others' ~ 'others',
        .default = NA
      )
    )
  
}

Read data

sampcomp_G <- 
  read_tsv(paste0(wd, 
           'Tables/DRS_sampcomp_results/siMETTL2A_G_sampcomp_results_2024-04-07.tsv.gz'
  ))
## Rows: 5884004 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (10): transcript_id, ref_kmer, GMM_cov_type, cluster_counts, Logit_LOR, ...
## dbl (17): position, GMM_logit_pvalue, KS_dwell_pvalue, KS_intensity_pvalue, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sampcomp_G
## # A tibble: 5,884,004 × 27
##    position transcript_id     ref_kmer GMM_logit_pvalue KS_dwell_pvalue
##       <dbl> <chr>             <chr>               <dbl>           <dbl>
##  1     1464 ENST00000264926.7 TCACA                  NA               1
##  2     1465 ENST00000264926.7 CACAT                   1               1
##  3     1466 ENST00000264926.7 ACATA                  NA               1
##  4     1467 ENST00000264926.7 CATAA                   1               1
##  5     1468 ENST00000264926.7 ATAAA                  NA               1
##  6     1473 ENST00000264926.7 AACGA                   1               1
##  7     1475 ENST00000264926.7 CGATC                  NA               1
##  8     1486 ENST00000264926.7 ACACA                  NA               1
##  9     1501 ENST00000264926.7 CAAGA                   1               1
## 10     1502 ENST00000264926.7 AAGAC                  NA               1
## # ℹ 5,883,994 more rows
## # ℹ 22 more variables: KS_intensity_pvalue <dbl>, GMM_cov_type <chr>,
## #   GMM_n_clust <dbl>, cluster_counts <chr>, Logit_LOR <chr>,
## #   c1_mean_intensity <dbl>, c2_mean_intensity <dbl>,
## #   c1_median_intensity <dbl>, c2_median_intensity <dbl>,
## #   c1_sd_intensity <dbl>, c2_sd_intensity <dbl>, c1_mean_dwell <dbl>,
## #   c2_mean_dwell <dbl>, c1_median_dwell <dbl>, c2_median_dwell <dbl>, …
sampcomp_I <- 
  read_tsv(
    paste0(wd, 'Tables/DRS_sampcomp_results/siMETTL2A_I_sampcomp_results_2024-04-07.tsv.gz'
  ))
## Rows: 3405923 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (10): transcript_id, ref_kmer, GMM_cov_type, cluster_counts, Logit_LOR, ...
## dbl (17): position, GMM_logit_pvalue, KS_dwell_pvalue, KS_intensity_pvalue, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sampcomp_I
## # A tibble: 3,405,923 × 27
##    position transcript_id     ref_kmer GMM_logit_pvalue KS_dwell_pvalue
##       <dbl> <chr>             <chr>               <dbl>           <dbl>
##  1     1036 ENST00000245046.7 GCACC               1.00             1.00
##  2     1037 ENST00000245046.7 CACCT               0.999            1.00
##  3     1039 ENST00000245046.7 CCTCT               0.999            1.00
##  4     1040 ENST00000245046.7 CTCTT               0.999            1.00
##  5     1041 ENST00000245046.7 TCTTG               0.999            1.00
##  6     1042 ENST00000245046.7 CTTGA               1.00             1.00
##  7     1043 ENST00000245046.7 TTGAA               0.999            1.00
##  8     1044 ENST00000245046.7 TGAAA              NA                1.00
##  9     1047 ENST00000245046.7 AATAA               1.00             1.00
## 10     1048 ENST00000245046.7 ATAAA               0.999            1.00
## # ℹ 3,405,913 more rows
## # ℹ 22 more variables: KS_intensity_pvalue <dbl>, GMM_cov_type <chr>,
## #   GMM_n_clust <dbl>, cluster_counts <chr>, Logit_LOR <chr>,
## #   c1_mean_intensity <dbl>, c2_mean_intensity <dbl>,
## #   c1_median_intensity <dbl>, c2_median_intensity <dbl>,
## #   c1_sd_intensity <dbl>, c2_sd_intensity <dbl>, c1_mean_dwell <dbl>,
## #   c2_mean_dwell <dbl>, c1_median_dwell <dbl>, c2_median_dwell <dbl>, …

Read annotation

espresso_AsPC1_annotation <- 
  read_tsv(
    paste0(wd, 'Tables/Espresso_AsPC1_annotation_cleaned_2024-03-29.tsv')
  ) |> 
  select(starts_with('transcript_'), starts_with('gene_'), seqname)
## Rows: 36717 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (12): seqname, source, feature, score, strand, frame, gene_id, transcrip...
## dbl  (2): start, end
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Number of significant sites in each method

sampcomp_G |> 
  group_by(intensity_up, intensity_down) |>
  reframe(n = n())
## # A tibble: 3 × 3
##   intensity_up intensity_down       n
##   <chr>        <chr>            <int>
## 1 not          down               150
## 2 not          not            5880811
## 3 up           not               3043
sampcomp_G |> 
  group_by(dwell_up, dwell_down) |>
  reframe(n = n())
## # A tibble: 3 × 3
##   dwell_up dwell_down       n
##   <chr>    <chr>        <int>
## 1 not      down            40
## 2 not      not        5883947
## 3 up       not             17
sampcomp_G |> 
  group_by(GMM_change) |>
  reframe(n = n())
## # A tibble: 3 × 2
##   GMM_change       n
##   <chr>        <int>
## 1 change         112
## 2 not        4122746
## 3 <NA>       1761146
sampcomp_I |> 
  group_by(intensity_up, intensity_down) |>
  reframe(n = n())
## # A tibble: 3 × 3
##   intensity_up intensity_down       n
##   <chr>        <chr>            <int>
## 1 not          down               292
## 2 not          not            3404487
## 3 up           not               1144
sampcomp_I |> 
  group_by(dwell_up, dwell_down) |>
  reframe(n = n())
## # A tibble: 3 × 3
##   dwell_up dwell_down       n
##   <chr>    <chr>        <int>
## 1 not      down            21
## 2 not      not        3405875
## 3 up       not             27
sampcomp_I |> 
  group_by(GMM_change) |>
  reframe(n = n())
## # A tibble: 3 × 2
##   GMM_change       n
##   <chr>        <int>
## 1 change         240
## 2 not        2626931
## 3 <NA>        778752

Join results from G and I

sampcomp_results_joined <- left_join(
  sampcomp_G, sampcomp_I, 
  by = join_by(position, transcript_id, ref_kmer),
  suffix = c('_G', '_I')
) |> 
  add_commonchange_info() |> 
  add_middle_Cinfo() |> 
  add_consecutive_C_info() |> 
  left_join(espresso_AsPC1_annotation) |> 
  select(transcript_id, transcript_name, everything())
## Joining with `by = join_by(transcript_id)`
sampcomp_results_joined |> 
  export_tsv(outdir = tabledir, compression = 'gz')
## 
## Exported to: /Users/shuheimitsutomi/Google Drive/My Drive/Analysis/METTL2A/Tables/DRS_m3C_sites/sampcomp_results_joined_2024-04-24.tsv.gz
## # A tibble: 5,884,004 × 67
##    transcript_id     transcript_name position ref_kmer GMM_logit_pvalue_G
##    <chr>             <chr>              <dbl> <chr>                 <dbl>
##  1 ENST00000264926.7 RAD18-201           1464 TCACA                    NA
##  2 ENST00000264926.7 RAD18-201           1465 CACAT                     1
##  3 ENST00000264926.7 RAD18-201           1466 ACATA                    NA
##  4 ENST00000264926.7 RAD18-201           1467 CATAA                     1
##  5 ENST00000264926.7 RAD18-201           1468 ATAAA                    NA
##  6 ENST00000264926.7 RAD18-201           1473 AACGA                     1
##  7 ENST00000264926.7 RAD18-201           1475 CGATC                    NA
##  8 ENST00000264926.7 RAD18-201           1486 ACACA                    NA
##  9 ENST00000264926.7 RAD18-201           1501 CAAGA                     1
## 10 ENST00000264926.7 RAD18-201           1502 AAGAC                    NA
## # ℹ 5,883,994 more rows
## # ℹ 62 more variables: KS_dwell_pvalue_G <dbl>, KS_intensity_pvalue_G <dbl>,
## #   GMM_cov_type_G <chr>, GMM_n_clust_G <dbl>, cluster_counts_G <chr>,
## #   Logit_LOR_G <chr>, c1_mean_intensity_G <dbl>, c2_mean_intensity_G <dbl>,
## #   c1_median_intensity_G <dbl>, c2_median_intensity_G <dbl>,
## #   c1_sd_intensity_G <dbl>, c2_sd_intensity_G <dbl>, c1_mean_dwell_G <dbl>,
## #   c2_mean_dwell_G <dbl>, c1_median_dwell_G <dbl>, c2_median_dwell_G <dbl>, …

Calculate commonly changed positions in each parameter

sampcomp_intensity_up <- 
  sampcomp_results_joined |> 
  group_by(intensity_up) |> 
  reframe(n = n()) |> 
  mutate(params = 'intensity_up') |> 
  pivot_wider(id_cols = c(params), names_from = intensity_up, values_from = n)
sampcomp_intensity_up
## # A tibble: 1 × 5
##   params       common `only G` `only I`  others
##   <chr>         <int>    <int>    <int>   <int>
## 1 intensity_up    605     2438      535 5880426
sampcomp_intensity_down <- 
  sampcomp_results_joined |> 
  group_by(intensity_down) |> 
  reframe(n = n()) |> 
  mutate(params = 'intensity_down') |> 
  pivot_wider(id_cols = c(params), names_from = intensity_down, values_from = n)

sampcomp_dwell_up <- 
  sampcomp_results_joined |> 
  group_by(dwell_up) |> 
  reframe(n = n()) |> 
  mutate(params = 'dwell_up') |> 
  pivot_wider(id_cols = c(params), names_from = dwell_up, values_from = n)

sampcomp_dwell_down <- 
  sampcomp_results_joined |> 
  group_by(dwell_down) |> 
  reframe(n = n()) |> 
  mutate(params = 'dwell_down') |> 
  pivot_wider(id_cols = c(params), names_from = dwell_down, values_from = n)

sampcomp_GMM_change <- 
  sampcomp_results_joined |> 
  group_by(GMM_change) |> 
  reframe(n = n()) |> 
  mutate(params = 'GMM_change') |> 
  pivot_wider(id_cols = c(params), names_from = GMM_change, values_from = n)
sampcomp_GMM_change
## # A tibble: 1 × 5
##   params     common `only G` `only I`  others
##   <chr>       <int>    <int>    <int>   <int>
## 1 GMM_change     32       80      208 5883684

BInd data of all parameters

sampcomp_sig_nums <-  bind_rows(
  sampcomp_intensity_up, sampcomp_intensity_down,
  sampcomp_dwell_up    , sampcomp_dwell_down, sampcomp_GMM_change
) |> 
  pivot_longer(cols = -params, names_to = 'sig_in', values_to = 'n') |> 
  calc_method_percentage() |> 
  change_factor_levels()
sampcomp_sig_nums |> 
  export_tsv(outdir = tabledir, compression = 'gz')
## 
## Exported to: /Users/shuheimitsutomi/Google Drive/My Drive/Analysis/METTL2A/Tables/DRS_m3C_sites/sampcomp_sig_nums_2024-04-24.tsv.gz
## # A tibble: 20 × 4
## # Groups:   params [5]
##    params         sig_in       n percentage
##    <fct>          <fct>    <int>      <dbl>
##  1 intensity_up   common     605   0.0103  
##  2 intensity_up   only G    2438   0.0414  
##  3 intensity_up   only I     535   0.00909 
##  4 intensity_up   others 5880426  99.9     
##  5 intensity_down common      21   0.000357
##  6 intensity_down only G     129   0.00219 
##  7 intensity_down only I     270   0.00459 
##  8 intensity_down others 5883584 100.      
##  9 dwell_up       common       6   0.000102
## 10 dwell_up       only G      11   0.000187
## 11 dwell_up       only I      21   0.000357
## 12 dwell_up       others 5883966 100.      
## 13 dwell_down     common       7   0.000119
## 14 dwell_down     only G      33   0.000561
## 15 dwell_down     only I      14   0.000238
## 16 dwell_down     others 5883950 100.      
## 17 GMM_change     common      32   0.000544
## 18 GMM_change     only G      80   0.00136 
## 19 GMM_change     only I     208   0.00354 
## 20 GMM_change     others 5883684 100.
sampcomp_sig_nums_barplot <- 
  sampcomp_sig_nums |>
  filter(sig_in == 'common') |> 
  #filter(sig_in != 'others') |> 
  ggplot(aes(x = fct_rev(params), y = n, fill = params)) +
  geom_bar(stat = 'identity') +
  coord_flip() +
  scale_fill_manual(
    values = c('#f23e3e', '#3e3ef2', '#f23ef2', '#3ef23e', 'gray30')
  )
  #scale_fill_manual(values = c('#A3A3F9', '#F23E3E', '#37D9CC')) 
  #scale_fill_viridis_d()
sampcomp_sig_nums_barplot |> 
  ggsave_multiple_formats(
    width = 5, height = 5, fontsize = 7, outdir = figdir
  )

C content

whole_kmer_c_percentage <- 
  sampcomp_results_joined |> 
  group_by(middleC_info) |> 
  reframe(n = n()) |> 
  mutate(percentage = 100 * n / sum(n)) |> 
  mutate(params = 'whole_kmer') |> 
  pivot_wider(
    id_cols = params, names_from = middleC_info, values_from = percentage
  ) |> 
  mutate(sig_in = 'common')
whole_kmer_c_percentage
## # A tibble: 1 × 6
##   params        C1    C2    C3 others sig_in
##   <chr>      <dbl> <dbl> <dbl>  <dbl> <chr> 
## 1 whole_kmer  11.6  8.94  1.59   77.9 common
intensity_up_c_percentage <- 
  sampcomp_results_joined |> 
  group_by(intensity_up, middleC_info) |> 
  reframe(n = n()) |> 
  group_by(intensity_up) |> 
  mutate(percentage = 100 * n / sum(n))  |> 
  mutate(params = 'intensity_up') |> 
  rename(sig_in = intensity_up) |> 
  pivot_wider(
    id_cols = c(params, sig_in), 
    names_from = c(middleC_info), values_from = percentage
  )
intensity_up_c_percentage
## # A tibble: 4 × 6
## # Groups:   sig_in [4]
##   params       sig_in    C1    C2    C3 others
##   <chr>        <chr>  <dbl> <dbl> <dbl>  <dbl>
## 1 intensity_up common  7.60 40.8  32.4    19.2
## 2 intensity_up only G 14.0  31.7  16.8    37.5
## 3 intensity_up only I  7.10 39.8   8.79   44.3
## 4 intensity_up others 11.6   8.93  1.58   77.9
intensity_down_c_percentage <- 
  sampcomp_results_joined |> 
  group_by(intensity_down, middleC_info) |> 
  reframe(n = n()) |> 
  group_by(intensity_down) |> 
  mutate(percentage = 100 * n / sum(n)) |> 
  mutate(params = 'intensity_down') |> 
  rename(sig_in = intensity_down) |> 
  pivot_wider(
    id_cols = c(params, sig_in), 
    names_from = c(middleC_info), values_from = percentage
  )
intensity_down_c_percentage
## # A tibble: 4 × 6
## # Groups:   sig_in [4]
##   params         sig_in    C1    C2    C3 others
##   <chr>          <chr>  <dbl> <dbl> <dbl>  <dbl>
## 1 intensity_down common  19.0  4.76 9.52    66.7
## 2 intensity_down only G  10.9  4.65 0.775   83.7
## 3 intensity_down only I  12.6  4.81 0.370   82.2
## 4 intensity_down others  11.6  8.94 1.59    77.9
dwell_up_c_percentage <- 
  sampcomp_results_joined |> 
  group_by(dwell_up, middleC_info) |> 
  reframe(n = n()) |> 
  group_by(dwell_up) |> 
  mutate(percentage = 100 * n / sum(n)) |> 
  mutate(params = 'dwell_up') |> 
  rename(sig_in = dwell_up) |> 
  pivot_wider(
    id_cols = c(params, sig_in), 
    names_from = c(middleC_info), values_from = percentage
  )
dwell_up_c_percentage
## # A tibble: 4 × 6
## # Groups:   sig_in [4]
##   params   sig_in    C1 others    C2    C3
##   <chr>    <chr>  <dbl>  <dbl> <dbl> <dbl>
## 1 dwell_up common  33.3   66.7 NA    NA   
## 2 dwell_up only G  27.3   54.5  9.09  9.09
## 3 dwell_up only I  14.3   71.4  9.52  4.76
## 4 dwell_up others  11.6   77.9  8.94  1.59
dwell_down_c_percentage <- 
  sampcomp_results_joined |> 
  group_by(dwell_down, middleC_info) |> 
  reframe(n = n()) |> 
  group_by(dwell_down) |> 
  mutate(percentage = 100 * n / sum(n)) |> 
  mutate(params = 'dwell_down') |> 
  rename(sig_in = dwell_down) |> 
  pivot_wider(
    id_cols = c(params, sig_in), 
    names_from = c(middleC_info), values_from = percentage
  )
dwell_down_c_percentage
## # A tibble: 4 × 6
## # Groups:   sig_in [4]
##   params     sig_in    C1    C3 others    C2
##   <chr>      <chr>  <dbl> <dbl>  <dbl> <dbl>
## 1 dwell_down common  14.3 28.6    57.1 NA   
## 2 dwell_down only G  12.1 21.2    45.5 21.2 
## 3 dwell_down only I  NA   NA      78.6 21.4 
## 4 dwell_down others  11.6  1.59   77.9  8.94
GMM_change_c_percentage <- 
  sampcomp_results_joined |> 
  group_by(GMM_change, middleC_info) |> 
  reframe(n = n()) |> 
  group_by(GMM_change) |> 
  mutate(percentage = 100 * n / sum(n)) |> 
  mutate(params = 'GMM_change') |> 
  rename(sig_in = GMM_change) |> 
  pivot_wider(
    id_cols = c(params, sig_in), 
    names_from = c(middleC_info), values_from = percentage
  )
GMM_change_c_percentage
## # A tibble: 4 × 6
## # Groups:   sig_in [4]
##   params     sig_in    C1    C2    C3 others
##   <chr>      <chr>  <dbl> <dbl> <dbl>  <dbl>
## 1 GMM_change common 25     3.12 12.5    59.4
## 2 GMM_change only G 10    21.2  10      58.8
## 3 GMM_change only I  7.21 15.4   1.92   75.5
## 4 GMM_change others 11.6   8.94  1.59   77.9
c_percentage <-  bind_rows(
  intensity_up_c_percentage, intensity_down_c_percentage, 
  dwell_up_c_percentage, dwell_down_c_percentage, 
  GMM_change_c_percentage, whole_kmer_c_percentage
)
c_percentage
## # A tibble: 21 × 6
## # Groups:   sig_in [4]
##    params         sig_in    C1    C2     C3 others
##    <chr>          <chr>  <dbl> <dbl>  <dbl>  <dbl>
##  1 intensity_up   common  7.60 40.8  32.4     19.2
##  2 intensity_up   only G 14.0  31.7  16.8     37.5
##  3 intensity_up   only I  7.10 39.8   8.79    44.3
##  4 intensity_up   others 11.6   8.93  1.58    77.9
##  5 intensity_down common 19.0   4.76  9.52    66.7
##  6 intensity_down only G 10.9   4.65  0.775   83.7
##  7 intensity_down only I 12.6   4.81  0.370   82.2
##  8 intensity_down others 11.6   8.94  1.59    77.9
##  9 dwell_up       common 33.3  NA    NA       66.7
## 10 dwell_up       only G 27.3   9.09  9.09    54.5
## # ℹ 11 more rows
param_levels <- c(
  'intensity_up', 'intensity_down', 'dwell_up', 'dwell_down',
  'GMM_change', 'whole_kmer'
)

c_percentage_long <- 
  c_percentage |> 
  pivot_longer(
    cols = -c(params, sig_in), 
    names_to = 'base', values_to = 'percentage'
  ) |> 
  mutate(params = factor(params, param_levels))
c_percentage_long |> 
  export_tsv(outdir = tabledir, compression = 'gz')
## 
## Exported to: /Users/shuheimitsutomi/Google Drive/My Drive/Analysis/METTL2A/Tables/DRS_m3C_sites/c_percentage_long_2024-04-24.tsv.gz
## # A tibble: 84 × 4
## # Groups:   sig_in [4]
##    params       sig_in base   percentage
##    <fct>        <chr>  <chr>       <dbl>
##  1 intensity_up common C1           7.60
##  2 intensity_up common C2          40.8 
##  3 intensity_up common C3          32.4 
##  4 intensity_up common others      19.2 
##  5 intensity_up only G C1          14.0 
##  6 intensity_up only G C2          31.7 
##  7 intensity_up only G C3          16.8 
##  8 intensity_up only G others      37.5 
##  9 intensity_up only I C1           7.10
## 10 intensity_up only I C2          39.8 
## # ℹ 74 more rows
c_percentage_barplot <- 
  c_percentage_long |> 
  filter(sig_in == 'common') |> 
  mutate(base = factor(base, levels = c('C3', 'C2', 'C1', 'others'))) |> 
  ggplot(aes(
    x = params |> fct_rev(), y = percentage, 
    fill = base |> fct_rev())) +
  geom_bar(stat = 'identity', colour = 'gray20', size = 0.4) +
  scale_y_continuous(breaks = seq(0, 100, 20)) +
  scale_fill_manual(
    values = c('#BEBEBE', '#c5c5fb', '#7777F5', '#3131c1')
  ) +
  coord_flip()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
c_percentage_barplot |> 
  ggsave_multiple_formats(
    outdir = figdir, width = 4, height = 4, fontsize = 7
  )
## Warning: Removed 3 rows containing missing values (`position_stack()`).
## Warning: Removed 3 rows containing missing values (`position_stack()`).
## Removed 3 rows containing missing values (`position_stack()`).
## Removed 3 rows containing missing values (`position_stack()`).
## Removed 3 rows containing missing values (`position_stack()`).

Consecutive C information

Calculate percentage

intensity_up_middleC2_percentage <- 
  sampcomp_results_joined |> 
  group_by(intensity_up, have_CC_middle) |> 
  reframe(n = n()) |> 
  group_by(intensity_up) |> 
  mutate(percentage = 100 * n / sum(n))
intensity_up_middleC2_percentage
## # A tibble: 8 × 4
## # Groups:   intensity_up [4]
##   intensity_up have_CC_middle       n percentage
##   <chr>        <chr>            <int>      <dbl>
## 1 common       no                 162       26.8
## 2 common       yes                443       73.2
## 3 only G       no                1255       51.5
## 4 only G       yes               1183       48.5
## 5 only I       no                 275       51.4
## 6 only I       yes                260       48.6
## 7 others       no             5262831       89.5
## 8 others       yes             617595       10.5
intensity_up_middleC3_percentage <- 
  sampcomp_results_joined |> 
  group_by(intensity_up, have_C3_middle) |> 
  reframe(n = n()) |> 
  group_by(intensity_up) |> 
  mutate(percentage = 100 * n / sum(n))
intensity_up_middleC3_percentage
## # A tibble: 8 × 4
## # Groups:   intensity_up [4]
##   intensity_up have_C3_middle       n percentage
##   <chr>        <chr>            <int>      <dbl>
## 1 common       no                 409      67.6 
## 2 common       yes                196      32.4 
## 3 only G       no                2029      83.2 
## 4 only G       yes                409      16.8 
## 5 only I       no                 488      91.2 
## 6 only I       yes                 47       8.79
## 7 others       no             5787727      98.4 
## 8 others       yes              92699       1.58
intensity_up_consecutive_C_percentage <- 
  sampcomp_results_joined |> 
  group_by(intensity_up, middleC_info) |> 
  reframe(n = n()) |> 
  group_by(intensity_up) |> 
  mutate(percentage = 100 * n / sum(n)) 
intensity_up_consecutive_C_percentage
## # A tibble: 16 × 4
## # Groups:   intensity_up [4]
##    intensity_up middleC_info       n percentage
##    <chr>        <chr>          <int>      <dbl>
##  1 common       C1                46       7.60
##  2 common       C2               247      40.8 
##  3 common       C3               196      32.4 
##  4 common       others           116      19.2 
##  5 only G       C1               341      14.0 
##  6 only G       C2               774      31.7 
##  7 only G       C3               409      16.8 
##  8 only G       others           914      37.5 
##  9 only I       C1                38       7.10
## 10 only I       C2               213      39.8 
## 11 only I       C3                47       8.79
## 12 only I       others           237      44.3 
## 13 others       C1            682348      11.6 
## 14 others       C2            524896       8.93
## 15 others       C3             92699       1.58
## 16 others       others       4580483      77.9
intensity_up_middleC2_percentage_barplot <- 
  intensity_up_middleC2_percentage |> 
  ggplot(aes(
    x = intensity_up |> fct_rev(), y = percentage, 
    fill = have_CC_middle |> fct_rev()
  )) +
  geom_bar(stat = 'identity') +
  scale_y_reverse() +
  scale_fill_manual(values = c('#3E3EF2', '#BEBEBE')) +
  coord_flip() 
intensity_up_middleC2_percentage_barplot |> 
  ggsave_multiple_formats(
    outdir = figdir, width = 4, height = 3, fontsize = 7
  )

intensity_up_consecutive_C_percentage_barplot <- 
  intensity_up_consecutive_C_percentage |> 
  mutate(middleC_info = factor(middleC_info, 
                               levels = c('C3', 'C2', 'C1', 'others'))) |> 
  ggplot(aes(
    x = intensity_up |> fct_rev(), y = percentage, 
    fill = middleC_info |> fct_rev()
  )) +
  geom_bar(stat = 'identity', colour = 'gray20', size = 0.4) +
  scale_fill_manual(
    values = c('#BEBEBE', '#c5c5fb', '#7777F5', '#3131c1')
  ) +
  coord_flip() 
intensity_up_consecutive_C_percentage_barplot |> 
  ggsave_multiple_formats(
    outdir = figdir, width = 4, height = 3.5, fontsize = 7
  )

Sesison info

sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.1 (2023-06-16)
##  os       macOS Sonoma 14.3
##  system   aarch64, darwin20
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       Asia/Tokyo
##  date     2024-04-24
##  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  ! package      * version    date (UTC) lib source
##    abind          1.4-5      2016-07-21 [1] CRAN (R 4.3.0)
##    backports      1.4.1      2021-12-13 [1] CRAN (R 4.3.0)
##    bit            4.0.5      2022-11-15 [1] CRAN (R 4.3.0)
##    bit64          4.0.5      2020-08-30 [1] CRAN (R 4.3.0)
##    broom          1.0.5      2023-06-09 [1] CRAN (R 4.3.0)
##    bslib          0.5.1      2023-08-11 [1] CRAN (R 4.3.0)
##    cachem         1.0.8      2023-05-01 [1] CRAN (R 4.3.0)
##    callr          3.7.3      2022-11-02 [1] CRAN (R 4.3.0)
##    car            3.1-2      2023-03-30 [1] CRAN (R 4.3.0)
##    carData        3.0-5      2022-01-06 [1] CRAN (R 4.3.0)
##    class          7.3-22     2023-05-03 [1] CRAN (R 4.3.1)
##    cli            3.6.1      2023-03-23 [1] CRAN (R 4.3.0)
##    codetools      0.2-19     2023-02-01 [1] CRAN (R 4.3.1)
##    colorspace     2.1-0      2023-01-23 [1] CRAN (R 4.3.0)
##    crayon         1.5.2      2022-09-29 [1] CRAN (R 4.3.0)
##    data.table     1.14.8     2023-02-17 [1] CRAN (R 4.3.0)
##    desc           1.4.2      2022-09-08 [1] CRAN (R 4.3.0)
##    devtools       2.4.5      2022-10-11 [1] CRAN (R 4.3.0)
##    dials          1.2.0      2023-04-03 [1] CRAN (R 4.3.0)
##    DiceDesign     1.9        2021-02-13 [1] CRAN (R 4.3.0)
##    digest         0.6.33     2023-07-07 [1] CRAN (R 4.3.0)
##    dplyr        * 1.1.2      2023-04-20 [1] CRAN (R 4.3.0)
##    ellipsis       0.3.2      2021-04-29 [1] CRAN (R 4.3.0)
##    evaluate       0.21       2023-05-05 [1] CRAN (R 4.3.0)
##    fansi          1.0.4      2023-01-22 [1] CRAN (R 4.3.0)
##    farver         2.1.1      2022-07-06 [1] CRAN (R 4.3.0)
##    fastmap        1.1.1      2023-02-24 [1] CRAN (R 4.3.0)
##    forcats      * 1.0.0      2023-01-29 [1] CRAN (R 4.3.0)
##    foreach        1.5.2      2022-02-02 [1] CRAN (R 4.3.0)
##    fs             1.6.3      2023-07-20 [1] CRAN (R 4.3.0)
##    furrr          0.3.1      2022-08-15 [1] CRAN (R 4.3.0)
##    future         1.33.0     2023-07-01 [1] CRAN (R 4.3.0)
##    future.apply   1.11.0     2023-05-21 [1] CRAN (R 4.3.0)
##    generics       0.1.3      2022-07-05 [1] CRAN (R 4.3.0)
##    ggforce        0.4.1      2022-10-04 [1] CRAN (R 4.3.0)
##    ggplot2      * 3.4.3      2023-08-14 [1] CRAN (R 4.3.0)
##    ggpubr         0.6.0      2023-02-10 [1] CRAN (R 4.3.0)
##    ggrepel        0.9.3      2023-02-03 [1] CRAN (R 4.3.0)
##    ggsignif       0.6.4      2022-10-13 [1] CRAN (R 4.3.0)
##    globals        0.16.2     2022-11-21 [1] CRAN (R 4.3.0)
##    glue           1.6.2      2022-02-24 [1] CRAN (R 4.3.0)
##    gower          1.0.1      2022-12-22 [1] CRAN (R 4.3.0)
##    GPfit          1.0-8      2019-02-08 [1] CRAN (R 4.3.0)
##    gtable         0.3.3      2023-03-21 [1] CRAN (R 4.3.0)
##    hardhat        1.3.0      2023-03-30 [1] CRAN (R 4.3.0)
##    highr          0.10       2022-12-22 [1] CRAN (R 4.3.0)
##    hms            1.1.3      2023-03-21 [1] CRAN (R 4.3.0)
##    htmltools      0.5.6      2023-08-10 [1] CRAN (R 4.3.0)
##    htmlwidgets    1.6.2      2023-03-17 [1] CRAN (R 4.3.0)
##    httpuv         1.6.11     2023-05-11 [1] CRAN (R 4.3.0)
##    ipred          0.9-14     2023-03-09 [1] CRAN (R 4.3.0)
##    iterators      1.0.14     2022-02-05 [1] CRAN (R 4.3.0)
##    jquerylib      0.1.4      2021-04-26 [1] CRAN (R 4.3.0)
##    jsonlite       1.8.7      2023-06-29 [1] CRAN (R 4.3.0)
##    knitr          1.43       2023-05-25 [1] CRAN (R 4.3.0)
##    labeling       0.4.2      2020-10-20 [1] CRAN (R 4.3.0)
##    later          1.3.1      2023-05-02 [1] CRAN (R 4.3.0)
##    lattice        0.21-8     2023-04-05 [1] CRAN (R 4.3.1)
##    lava           1.7.2.1    2023-02-27 [1] CRAN (R 4.3.0)
##    lhs            1.1.6      2022-12-17 [1] CRAN (R 4.3.0)
##    lifecycle      1.0.3      2022-10-07 [1] CRAN (R 4.3.0)
##    listenv        0.9.0      2022-12-16 [1] CRAN (R 4.3.0)
##    lubridate    * 1.9.2      2023-02-10 [1] CRAN (R 4.3.0)
##    magrittr       2.0.3      2022-03-30 [1] CRAN (R 4.3.0)
##    MASS           7.3-60     2023-05-04 [1] CRAN (R 4.3.1)
##    Matrix         1.6-1      2023-08-14 [1] CRAN (R 4.3.0)
##    memoise        2.0.1      2021-11-26 [1] CRAN (R 4.3.0)
##    mime           0.12       2021-09-28 [1] CRAN (R 4.3.0)
##    miniUI         0.1.1.1    2018-05-18 [1] CRAN (R 4.3.0)
##    munsell        0.5.0      2018-06-12 [1] CRAN (R 4.3.0)
##  R myUtilities  * 0.0.0.9000 <NA>       [?] <NA>
##    nnet           7.3-19     2023-05-03 [1] CRAN (R 4.3.1)
##    parallelly     1.36.0     2023-05-26 [1] CRAN (R 4.3.0)
##    parsnip        1.1.1      2023-08-17 [1] CRAN (R 4.3.0)
##    pillar         1.9.0      2023-03-22 [1] CRAN (R 4.3.0)
##    pkgbuild       1.4.2      2023-06-26 [1] CRAN (R 4.3.0)
##    pkgconfig      2.0.3      2019-09-22 [1] CRAN (R 4.3.0)
##    pkgload        1.3.2.1    2023-07-08 [1] CRAN (R 4.3.0)
##    polyclip       1.10-4     2022-10-20 [1] CRAN (R 4.3.0)
##    prettyunits    1.1.1      2020-01-24 [1] CRAN (R 4.3.0)
##    processx       3.8.2      2023-06-30 [1] CRAN (R 4.3.0)
##    prodlim        2023.08.28 2023-08-28 [1] CRAN (R 4.3.0)
##    profvis        0.3.8      2023-05-02 [1] CRAN (R 4.3.0)
##    promises       1.2.1      2023-08-10 [1] CRAN (R 4.3.0)
##    ps             1.7.5      2023-04-18 [1] CRAN (R 4.3.0)
##    purrr        * 1.0.2      2023-08-10 [1] CRAN (R 4.3.0)
##    R6             2.5.1      2021-08-19 [1] CRAN (R 4.3.0)
##    ragg           1.2.5      2023-01-12 [1] CRAN (R 4.3.0)
##    Rcpp           1.0.11     2023-07-06 [1] CRAN (R 4.3.0)
##    readr        * 2.1.4      2023-02-10 [1] CRAN (R 4.3.0)
##    recipes        1.0.8      2023-08-25 [1] CRAN (R 4.3.0)
##    remotes        2.4.2.1    2023-07-18 [1] CRAN (R 4.3.0)
##    rlang          1.1.1      2023-04-28 [1] CRAN (R 4.3.0)
##    rmarkdown      2.24       2023-08-14 [1] CRAN (R 4.3.0)
##    rpart          4.1.19     2022-10-21 [1] CRAN (R 4.3.1)
##    rprojroot      2.0.3      2022-04-02 [1] CRAN (R 4.3.0)
##    rsample        1.2.0      2023-08-23 [1] CRAN (R 4.3.0)
##    rstatix        0.7.2      2023-02-01 [1] CRAN (R 4.3.0)
##    rstudioapi     0.15.0     2023-07-07 [1] CRAN (R 4.3.0)
##    sass           0.4.7      2023-07-15 [1] CRAN (R 4.3.0)
##    scales         1.2.1      2022-08-20 [1] CRAN (R 4.3.0)
##    sessioninfo    1.2.2      2021-12-06 [1] CRAN (R 4.3.0)
##    shiny          1.7.5      2023-08-12 [1] CRAN (R 4.3.0)
##    stringi        1.7.12     2023-01-11 [1] CRAN (R 4.3.0)
##    stringr      * 1.5.0      2022-12-02 [1] CRAN (R 4.3.0)
##    survival       3.5-5      2023-03-12 [1] CRAN (R 4.3.1)
##    svglite        2.1.1      2023-01-10 [1] CRAN (R 4.3.0)
##    systemfonts    1.0.4      2022-02-11 [1] CRAN (R 4.3.0)
##    textshaping    0.3.6      2021-10-13 [1] CRAN (R 4.3.0)
##    tibble       * 3.2.1      2023-03-20 [1] CRAN (R 4.3.0)
##    tidyr        * 1.3.0      2023-01-24 [1] CRAN (R 4.3.0)
##    tidyselect     1.2.0      2022-10-10 [1] CRAN (R 4.3.0)
##    tidyverse    * 2.0.0      2023-02-22 [1] CRAN (R 4.3.0)
##    timechange     0.2.0      2023-01-11 [1] CRAN (R 4.3.0)
##    timeDate       4022.108   2023-01-07 [1] CRAN (R 4.3.0)
##    tune           1.1.2      2023-08-23 [1] CRAN (R 4.3.0)
##    tweenr         2.0.2      2022-09-06 [1] CRAN (R 4.3.0)
##    tzdb           0.4.0      2023-05-12 [1] CRAN (R 4.3.0)
##    urlchecker     1.0.1      2021-11-30 [1] CRAN (R 4.3.0)
##    usethis        2.2.2      2023-07-06 [1] CRAN (R 4.3.0)
##    utf8           1.2.3      2023-01-31 [1] CRAN (R 4.3.0)
##    vctrs          0.6.3      2023-06-14 [1] CRAN (R 4.3.0)
##    vroom          1.6.3      2023-04-28 [1] CRAN (R 4.3.0)
##    withr          2.5.0      2022-03-03 [1] CRAN (R 4.3.0)
##    workflows      1.1.3      2023-02-22 [1] CRAN (R 4.3.0)
##    xfun           0.40       2023-08-09 [1] CRAN (R 4.3.0)
##    xtable         1.8-4      2019-04-21 [1] CRAN (R 4.3.0)
##    yaml           2.3.7      2023-01-23 [1] CRAN (R 4.3.0)
##    yardstick      1.2.0      2023-04-21 [1] CRAN (R 4.3.0)
## 
##  [1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
## 
##  R ── Package was removed from disk.
## 
## ──────────────────────────────────────────────────────────────────────────────